##Assignment 2
##Read Data from file and load libraries
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.3
## ✓ tidyr 1.0.0 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
df<-read.table("SENIC.txt")
#Creating function to return indices
my_func<-function(x,name){
col=x[[name]]
quantile_1=quantile(col,0.25)
quantile_3=quantile(col,0.75)
l1=quantile_3+1.5*(quantile_3-quantile_1)
l2=quantile_1-1.5*(quantile_3-quantile_1)
indices=which(col>=l1 | col<=l2)
return(indices)
}
##Density Ploting of Infection Risk
indices_infection<-my_func(df,"V3")
outliers_infection<-df$V3[indices_infection]
outliers <- tibble(x = outliers_infection, y = 0)
g<-ggplot(df,aes(x=V3))+geom_histogram(aes(y = ..density..),bins=50, alpha = 0.7,fill = "#6666FF")+geom_density(col="#330066")
g<-g+geom_point(data = outliers, aes(x,y),size=2,colour="#00CCFF",pch=23,fill="#00CCFF")+ggtitle("Density & Histogram Plot of Infection Risk")+labs(x="Infection Risk",y="Density")
g<-g+theme(plot.title = element_text(color="#666666",size=21, hjust=0))
g+annotate(geom="text",x=64,y=0.07,label="Diamond points \n represent outliers")
##Producce graphs for all other variables
dt<-df[c(-1,-8,-9)]
names(dt)<-c('LengthofStay','Age','InfectionRisk','RoutineCulturingRatio',
'RoutineChestXrayRatio','NumberofBeds','AverageDailyCensus',
'NumberofNurses','AvailableFacilitiesServices')
myplots<-list()
for (name in names(dt))
{
indices<-my_func(dt,name)
outliers<-dt[[name]][indices]
outliers_names<-tibble(x=outliers,y=0)
myplots[[name]]<-ggplot(dt, aes_string(x =name)) + geom_density(col="#330066")+geom_histogram(aes(y=..density..),bins=50, alpha = 0.7,fill = "#6666FF")+geom_point(data=outliers_names,aes(x,y),size=2,color="#00CCFF",pch=23,fill="#00CCFF")
}
grid.arrange(grobs=myplots)
##Scatter Plot
ggplot(df,aes(y=V10,x=V3,col=V6))+geom_point(size=5)+ggtitle("Scatter Plot of Infection Rsk & Number of Nurses colored by Number of Beds")+labs(x="Infection Risk",y="Number of Nurses")
##Plotly Graph
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
indices_infection<-my_func(df,"V3")
outliers_infection<-df$V3[indices_infection]
outliers <- tibble(x = outliers_infection, y = 0)
plot<-ggplot(df,aes(V3))+geom_histogram(aes(y=..density..,alpha=0.7),col="coral",fill="green")+geom_density(col="blue")+ggtitle("Density with Histogram overlay")+geom_point(data = outliers, aes(x,y),sizze=3)
## Warning: Ignoring unknown parameters: sizze
p<-ggplotly(plot)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p
##Plolty Plot Made with Pipe operator
Outlier_indices <- my_func(df,"V3")
Outlier_values<-df$V3[Outlier_indices]
yValue <- rep(0,length(Outlier_values))
hisPlot <- df %>% select(V3) %>% plot_ly(x=~V3,type="histogram") %>%
add_markers(x=~Outlier_values, y=~yValue)
hisPlot
##Shiny App